
RNGkind("L'Ecuyer-CMRG")
set.seed(1234)

##load the data and results files
load(paste0(getwd(), "/EMestimates.Rda"))
load(paste0(getwd(), "/EMestimates_boot.Rda"))
load(paste0(getwd(), "/EMestimates_placebo.Rda"))
load("speeches.Rda")
load("y.Rda")
load("all_data.Rda")



##load auxiliary data
incdata <- import(paste0(getwd(), "/DisposableIncome.csv"))
pres_app <- import(paste0(getwd(),"/pressapp.csv"))
commonspace <- import(paste0(getwd(), "/Hall_members.csv"))
party_unity <- read.dta(paste0(getwd(), "/PartyUnity.dta"))
freedom_caucus <- import(paste0(getwd(), "/FreedomCaucus.xlsx"))
vote_against_boehner <- import(paste0(getwd(), "/VoteAgainstBoehner.csv"))
chal_data <- import(paste0(getwd(), "/ChallengerData_1990on.csv"))
vote_desc <- import(paste0(getwd(), "/Hall_rollcalls.csv"))
vm <- readRDS(paste0(getwd(), "/MVreplication.RDS"))





##calculate party loyalty baseline rate for my model
baselines <-
  all_data %>% 
  group_by(cong,pid,rollcall) %>%
  dplyr::mutate(party_pos = median(vote, na.rm = TRUE)) %>%
  ungroup() %>%
  group_by(cong,id) %>%
  dplyr::summarise(baseline_rate_irt = 100*mean((vote == party_pos)[leader_speech==0], na.rm = TRUE)) %>%
  ungroup()

#transform results
mx <- mean(results$x.current)
sx <- sd(results$x.current)
results$x.current <- (results$x.current-mx)/sx
results$al.current <- results$al.current - results$be.current*mx
results$be.current <- results$be.current*sx

x.current <- results$x.current
al.current <- results$al.current
be.current <- results$be.current
de.current <- results$de.current

##calculate percent correctly predicted
times <- as.numeric(gsub("-.*","",colnames(y)))-min(as.numeric(gsub("-.*","",colnames(y))))+1
N <- nrow(y)
J <- ncol(y)

mu <- -matrix(rep(al.current,N),N,J,byrow=TRUE) +
  outer(x.current,be.current) +
  as.matrix(de.current[,times]*speeches)

pr_new <- pnorm(mu)
y_pred <- (pnorm(mu)>0.5)*1
colnames(y_pred) <- colnames(y)
comp_tab <- sum(y==y_pred,na.rm=TRUE)/(sum(y==y_pred,na.rm=TRUE) + sum(y!=y_pred,na.rm=TRUE))


##use bootstrap data to calculate uncertainties
results_boot[results_boot==0] <- NA
bmeans <- colMeans(results_boot,na.rm=TRUE)
blow <- apply(results_boot,2,quantile,0.05,na.rm=TRUE)
bhigh <- apply(results_boot,2,quantile,0.95,na.rm=TRUE)
bsd <- apply(results_boot,2,sd,na.rm=TRUE)
blow <- bmeans - 1.65*bsd
bhigh <- bmeans + 1.65*bsd

##calculate errors
err_new <- (y!=y_pred)*1
leg_err_new <- NULL
for (k in 1:13){
  tmp_errors <- rowSums(err_new[,times==k],na.rm = TRUE)
  all_na <- rowSums(is.na(err_new[,times==k]),na.rm = TRUE)
  tmp_errors[all_na==ncol(is.na(err_new[,times==k]))] <- NA
  leg_err_new <- c(leg_err_new, tmp_errors)
}

##we're going to do the same with baseline IRT

# ## Convert data and make starts/priors for estimation
rc <- convertRC(rollcall(y))
p <- makePriors(rc$n, rc$m, 1)
s <- getStarts(rc$n, rc$m, 1)

set.seed(1234)
s$x[,1] <- rnorm(nrow(s$x))


baseIRT <- binIRT(.rc = rc,
                  .starts = s,
                  .priors = p,
                  .control = {
                    list(threads = 1,
                         verbose = FALSE,
                         thresh = 1e-6
                    )
                  }
)



#calculate the pcp for the base model
mx <- mean(baseIRT$means$x[,1])
sx <- sd(baseIRT$means$x[,1])
baseIRT$means$x[,1] <- (baseIRT$means$x[,1]-mx)/sx
baseIRT$means$beta[,1] <- baseIRT$means$beta[,1] + baseIRT$means$beta[,2]*mx
baseIRT$means$beta[,2] <- baseIRT$means$beta[,2]*sx
mu_base <- matrix(rep(baseIRT$means$beta[,1],N),N,J,byrow=TRUE) +
  outer(baseIRT$means$x[,1],baseIRT$means$beta[,2])

pr_base <- pnorm(mu_base)
y_pred_base <- (pnorm(mu_base)>0.5)*1
comp_tab_base <- sum(y==y_pred_base,na.rm=TRUE)/(sum(y==y_pred_base,na.rm=TRUE) + sum(y!=y_pred_base,na.rm=TRUE))

colnames(y_pred_base) <- colnames(y)

#figure out errors across both models
err_base <- (y!=y_pred_base)*1

leg_err_base <- NULL
for (k in 1:13){
  tmp_errors <- rowSums(err_base[,times==k],na.rm = TRUE)
  all_na <- rowSums(is.na(err_base[,times==k]),na.rm = TRUE)
  tmp_errors[all_na==ncol(is.na(err_base[,times==k]))] <- NA
  leg_err_base <- c(leg_err_base, tmp_errors)
}

##calculate log-posterior across both models

log_lik_base <- (y*pnorm(mu_base,log.p = TRUE) + (1-y)*(pnorm(-mu_base,log.p = TRUE)))
log_lik_new <- (y*pnorm(mu,log.p = TRUE) + (1-y)*(pnorm(-mu,log.p = TRUE)))

AIC_base <- 2*(length(al.current)+length(be.current)+length(x.current))-2*sum(log_lik_base, na.rm=TRUE)
AIC_new <- 2*(length(al.current)+length(be.current)+length(x.current)+length(de.current))-2*sum(log_lik_new, na.rm=TRUE)

##compare aggregate fit across all models

GMP_total_new <- exp(sum(rowSums(log_lik_new,na.rm=TRUE))/(sum(!is.na(y))))
GMP_total_base <- exp(sum(rowSums(log_lik_base,na.rm=TRUE))/(sum(!is.na(y))))

##calculate the PRE across both models in aggregate

diff_base <- 0
diff_new <- 0
votes_err_base <- colSums(err_base,na.rm = TRUE)
votes_err_new <- colSums(err_new,na.rm = TRUE)
minority_vote <- rep(0,ncol(y))
for (k in 1:ncol(y)){
  minority_vote[k] <- min(table(y[,k]))
  diff_base <- diff_base + (minority_vote[k]-votes_err_base[k])
  diff_new <-  diff_new  + (minority_vote[k]-votes_err_new[k])
}

APRE_base_tot <- diff_base/sum(minority_vote)
APRE_new_tot <- diff_new/sum(minority_vote)



##put together with in a data frame
full_results <- data.frame(id=rep(as.numeric(rownames(y)),13), cong=rep(101:113,each=N),
                           ideals=rep(x.current,13), ideal_low=rep(blow[1:N],13), ideal_high=rep(bhigh[1:N],13),
                           loyalty=100*pnorm(c(de.current)),
                           loyalty_lag=c(rep(NA,N),100*pnorm(bmeans[(N+1):(length(bmeans)-N)])),
                           loyalty_low=100*pnorm(blow[(N+1):length(blow)]),
                           ideals_base=rep(baseIRT$means$x,13),
                           loyalty_high=100*pnorm(bhigh[(N+1):length(bhigh)]), 
                           errors_new=leg_err_new, 
                           errors_base=leg_err_base
)

full_results$loyalty[is.na(full_results$loyalty)] <- NA
full_results$loyalty_lag[is.na(full_results$loyalty_lag)] <- NA
full_results$loyalty_low[is.na(full_results$loyalty)] <- NA
full_results$loyalty_high[is.na(full_results$loyalty)] <- NA

##try to merge with NOMINATE data
commonspace$idcong <- paste(commonspace$icpsr,"-",commonspace$congress,sep="")
full_results$idcong <- paste(full_results$id,"-",full_results$cong,sep="")
all_together <- merge(full_results,commonspace,by.x="idcong",by.y="idcong")

##create majority party variable
MajorityParty <- rep(1,nrow(all_together))
MajorityParty <- replace(MajorityParty,
                         all_together$party_code==100 & all_together$congress>=104 & all_together$congress<=109,
                         0)
MajorityParty <- replace(MajorityParty,
                         all_together$party_code==100 & all_together$congress>=112,
                         0)
MajorityParty <- replace(MajorityParty,
                         all_together$party_code==200 & all_together$congress<104,
                         0)
MajorityParty <- replace(MajorityParty,
                         all_together$party_code==200 & all_together$congress>=110 & all_together$congress<=111,
                         0)

all_together$MajorityParty <- MajorityParty

##add in party unity and stuff

party_means <- ddply(all_together,.(congress,party_code),summarise,pmean=mean(ideals,na.rm=TRUE))
party_means$congress_party <- paste(party_means$congress,party_means$party_code,sep="-")
party_means <- party_means[,3:4]
all_together$congress_party <- paste(all_together$congress,all_together$party_code,sep="-")
all_together <- merge(all_together,party_means)

party_unity$idcong <- paste(party_unity$id,party_unity$cong,sep="-")
party_unity$party_unity <- party_unity$party_unity#/100
party_unity$party_unity_lag <- party_unity$party_unity_lag#/100

all_together <- merge(all_together, party_unity)

##next, we calculate party means and chamber means
chamber_sum <- ddply(all_together,.(congress), summarise, cmean=mean(ideals))
all_together <- merge(all_together,chamber_sum)
all_together$pdist <- abs(all_together$ideals-all_together$pmean)
all_together$cdist <- abs(all_together$ideals-all_together$cmean)



##find loyalist and rebel factions
all_together$significant_party_effect <- 
  (all_together$loyalty_high > 50 & all_together$loyalty_low > 50)*1 +
  (all_together$loyalty_high < 50 & all_together$loyalty_low < 50)*1
all_together$faction <- "Preferentialists"
all_together$faction[all_together$significant_party_effect==1 & all_together$loyalty>50] <- "Loyalists"
all_together$faction[all_together$significant_party_effect==1 & all_together$loyalty<50] <- "Rebels"

loyalty_summary <- ddply(all_together, .(congress,party_code), summarise,
                         Preferentialists=sum(significant_party_effect == 0, na.rm=TRUE)/length(ideals),
                         Loyalists=sum(significant_party_effect == 1 & loyalty>50, na.rm=TRUE)/length(ideals),
                         Rebels=sum(significant_party_effect == 1 & loyalty<50, na.rm=TRUE)/length(ideals))

loyalty_summary <- reshape2::melt(loyalty_summary, id.vars = c("congress","party_code"))
colnames(loyalty_summary)[3] <- "Faction"
loyalty_summary$value <- loyalty_summary$value

loyalty_summary <- ddply(loyalty_summary, .(congress, party_code),
                         transform, pos = cumsum(value) )

loyalty_summary$Party <- "Republicans"
loyalty_summary$Party[loyalty_summary$party_code==100] <- "Democrats"


all_together <- ddply(all_together, .(congress,party_code), transform,
                      loyaltyRank = rank(loyalty), unityRank = rank(party_unity))

##let's also examine the relationship between majority and minority party status

all_together <- 
  all_together %>% 
  group_by(cong, party_code) %>% 
  mutate(ideal_party_mean = mean(ideals, na.rm = TRUE)) %>%
  ungroup()


##Now we, look at error rates across votes

speech_base_errors <- votes_err_base[apply(speeches,2,sum)>0]
speech_new_errors <- votes_err_new[apply(speeches,2,sum)>0]
names(votes_err_base) <- colnames(y)
names(votes_err_new) <- colnames(y)

##load roll calls to see which ones are related to the errors
vote_desc <- subset(vote_desc, congress %in% 101:113)
vote_desc$votenum <- paste(vote_desc$congress,"-",vote_desc$rollnumber,sep="")

votes <- vote_desc$vote_desc
votes[votes==""] <- vote_desc$dtl_desc[votes==""]
names(votes) <- vote_desc$votenum

errors_data <- data.frame(error_difference=votes_err_base-votes_err_new,description=votes[names(votes_err_base)],
                          cong=as.numeric(gsub("-.*","",names(votes_err_base))))

vote_margins <- (colSums(y,na.rm=TRUE)-colSums(y==0,na.rm=TRUE))

largest_errors <- (subset(errors_data, error_difference>20))


colnames(pr_new) <- colnames(pr_base) <- colnames(y)

#let's examine predicted successes and failures
actual_yea <- colSums(y==1,na.rm=TRUE)
actual_nay <- colSums(y==0,na.rm=TRUE)
correct_outcome <- (actual_yea > actual_nay)*1

base_yea <- colSums(y_pred_base==1,na.rm=TRUE)
base_nay <- colSums(y_pred_base==0,na.rm=TRUE)
base_outcome <- (base_yea > base_nay)*1

new_yea <- colSums(y_pred==1,na.rm=TRUE)
new_nay <- colSums(y_pred==0,na.rm=TRUE)
new_outcome <- (new_yea > new_nay)*1



##calculate the PRE across both models by year
APRE_base <- rep(NA,13)
APRE_new <- rep(NA,13)

for (congs in 1:13){
  y_tmp <- y[,times==congs]
  votes_err_base_tmp <- votes_err_base[colnames(y_tmp)]
  votes_err_new_tmp <- votes_err_new[colnames(y_tmp)]
  minority_vote <- rep(NA,ncol(y_tmp))
  diff_base <- 0
  diff_new <- 0
  for (k in 1:ncol(y_tmp)){
    minority_vote[k] <- min(table(y_tmp[,k]))
    diff_base <- diff_base + (minority_vote[k]-votes_err_base_tmp[k])
    diff_new <-  diff_new  + (minority_vote[k]-votes_err_new_tmp[k])
  }
  
  APRE_base[congs] <- diff_base/sum(minority_vote)
  APRE_new[congs] <- diff_new/sum(minority_vote)
}

##calculate GMPs across both models
GMP_base <- NULL
GMP_new <- NULL
for (k in 1:13){
  tmp_gmp_base <- exp(sum(rowSums(log_lik_base[,times==k],na.rm=TRUE))/(sum(!is.na(y[,times==k]))))
  tmp_gmp_new <-  exp(sum(rowSums(log_lik_new[,times==k],na.rm=TRUE))/(sum(!is.na(y[,times==k]))))
  GMP_base <- c(GMP_base, tmp_gmp_base)
  GMP_new <- c(GMP_new, tmp_gmp_new)
}



#####################################
###Now add Jacobson's data
######################################

#need to convert state fips to match jacobson
all_together$statenum <- NA
for (i in 1:nrow(all_together)) all_together$statenum[i] <- which(all_together$state_abbrev[i]==state.abb)
all_together$elyr <- (1789+2*(all_together$congress-1))+1
all_together$district_code_rev <- all_together$district_code
all_together$district_code_rev[nchar(all_together$district_code_rev)==1] <- paste("0",all_together$district_code_rev[nchar(all_together$district_code_rev)==1],sep="")
all_together$stcdyr <- paste(as.numeric(paste(all_together$statenum,all_together$district_code_rev,sep="")),
                             all_together$elyr,sep="-")
chal_data$stcdyr <- paste(chal_data$stcd,chal_data$year,sep="-")

all_together3 <- merge(all_together, chal_data, by.x = "stcdyr", by.y = "stcdyr")


#now we need to figure out the incumbent voteshare
all_together3$inc_vote <- NA
all_together3$inc_vote[all_together3$inc==0] <- all_together3$rv[all_together3$inc==0]
all_together3$inc_vote[all_together3$inc==1] <- all_together3$dv[all_together3$inc==1]

#and its lag
all_together3$inc_vote_lag <- NA
all_together3$inc_vote_lag[all_together3$inc==0] <- all_together3$rvp[all_together3$inc==0]
all_together3$inc_vote_lag[all_together3$inc==1] <- all_together3$dvp[all_together3$inc==1]


#now we need to figure out the incumbent vs chal spending
all_together3$inc_spending_adv <- NA
all_together3$inc_spending_adv[all_together3$inc==0] <- (log(all_together3$rexp+1)-log(all_together3$dexp+1))[all_together3$inc==0]
all_together3$inc_spending_adv[all_together3$inc==1] <- (log(all_together3$dexp+1)-log(all_together3$rexp+1))[all_together3$inc==1]

#quality challenger present?
all_together3$qual_chal <- NA
all_together3$qual_chal[all_together3$po1==1] <- 1
all_together3$qual_chal[all_together3$po1==0] <- 0

#normalized pres voteshare
all_together3$presvote <- NA
all_together3$presvote[all_together3$inc==0] <- 100-all_together3$dpres[all_together3$inc==0]
all_together3$presvote[all_together3$inc==1] <- all_together3$dpres[all_together3$inc==1]

#freshman?
all_together3$freshman - NA
all_together3$freshman[all_together3$fr %in% c(1,2,3)] <- 1
all_together3$freshman[all_together3$fr == 0] <- 0

#midterm year?
all_together3$presparty <- (all_together3$elyr %in% c(1990,1992,2002:2008) & all_together3$party_code==200)*1 +
  (all_together3$elyr %in% c(1994:2000,2010:2014) & all_together3$party_code==100)*1
all_together3$midterm <- 0
all_together3$midterm[all_together3$elyr %in% c(1990,2002,2006) & all_together3$party_code==100] <- -1
all_together3$midterm[all_together3$elyr %in% c(1990,2002,2006) & all_together3$party_code==200] <- 1
all_together3$midterm[all_together3$elyr %in% c(1994,1998,2010,2014) & all_together3$party_code==100] <- 1
all_together3$midterm[all_together3$elyr %in% c(1994,1998,2010,2014) & all_together3$party_code==200] <- -1

#need to add real disposable income and presidential approval
elyr_income <- data.frame(elyr=seq(1990,2014,by=2), income_change=rep(NA,length(seq(1990,2014,by=2))))
counter <- 1
for (yr in seq(1990,2014,by=2)){
  tmp_income <- incdata[grep(paste("/",substr(yr,3,4),sep=""),incdata$DATE),]
  income_change <- 100*(tmp_income$DSPIC96[6]-tmp_income$DSPIC96[1])/tmp_income$DSPIC96[1]
  elyr_income$income_change[counter] <- income_change
  counter <- counter + 1
}

inc_with_pop <- data.frame(elyr_income, pres_approval=pres_app$julypop)

all_together3 <- merge(all_together3, inc_with_pop)

#need to recode presidential approval to be party specific
all_together3$pres_pty_approval <- all_together3$pres_approval-50
all_together3$pres_pty_approval[all_together3$elyr %in% c(1990,1992,2002,2004,2006,2008) & all_together3$party_code==100] <- -1*all_together3$pres_pty_approval[all_together3$elyr %in% c(1990,1992,2002,2004,2006,2008) & all_together3$party_code==100]
all_together3$pres_pty_approval[all_together3$elyr %in% c(1994,1996,1998,2000,2010,2012,2014) & all_together3$party_code==200] <- -1*all_together3$pres_pty_approval[all_together3$elyr %in% c(1994,1996,1998,2000,2010,2012,2014) & all_together3$party_code==200]



##now going to merge this with other approval data
party_unity$idcong <- paste(party_unity$id,party_unity$cong,sep="-")
party_unity$party_unity <- party_unity$party_unity#/100
party_unity$party_unity_lag <- party_unity$party_unity_lag#/100

all_together4 <- merge(all_together3, party_unity)







##add in volden-minozzi data
vm$icpsr <- vm$icpsrLegis
vm$idcong <- paste(vm$icpsr,vm$cong,sep="-")
# exclud_cols <- which(colnames(vm)  %in% c("icpsr","congress"))

all_together5 <- merge(all_together, vm, by = c("icpsr","congress"), all.x = TRUE)
all_together5$ideological_extremism_new <- all_together5$ideals
all_together5$ideological_extremism_new[all_together5$party_code==100] <- -all_together5$ideals[all_together5$party_code==100]

all_together5$congress_party <- paste(all_together5$congress,all_together5$party_code,sep="-")

party_means <- ddply(all_together5,.(congress_party),summarise,pmean=mean(ideals,na.rm=TRUE))
chamber_sum <- ddply(all_together5,.(congress), summarise, cmean=mean(ideals,na.rm=TRUE))

all_together5$pdist <- abs(all_together5$ideals-all_together5$pmean)
all_together5$cdist <- abs(all_together5$ideals-all_together5$cmean)

all_together5 <- merge(all_together5, baselines, by = c("id","cong"), all.x = TRUE)

##load in data to find out who is in the FC and who voted against Boehner
all_together5$freedom_caucus <- 0
all_together5$vote_against_boehner <- 0

for (i in 1:nrow(freedom_caucus)){
  all_together5$freedom_caucus[grep(freedom_caucus$icpsr[i],all_together5$icpsr)] <- 1
}

for (i in 1:nrow(vote_against_boehner)){
  all_together5$vote_against_boehner[grep(vote_against_boehner$icpsr[i],all_together5$icpsr)] <- 1
}

##focus on the 113th Congress Republicans and look at Party loyalty measured two ways
thiscong <- subset(all_together5, congress==113 & party_code==200)

#we also need to remove members who didn't return
icpsr_keep <- NULL
for (i in 1:nrow(thiscong)){
  if(length(grep(thiscong$icpsr[i] ,subset(commonspace, congress==114)$icpsr))!=0){
    icpsr_keep <- c(icpsr_keep, thiscong$icpsr[i])
  }
}

thiscong <- subset(thiscong, icpsr %in% icpsr_keep)


cat("Replication file 5 done.","\n")

